zink: implement streamout and xfb handling in ntv

this translates streamout info into xfb decorations and adds some workaround
handling for spurious gl_PointSize values

partly based on patches originally written by Dave Airlie <airlied@redhat.com>

Reviewed-by: Erik Faye-Lund <erik.faye-lund@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/5163>
This commit is contained in:
Mike Blumenkrantz 2020-06-01 14:49:44 -04:00 committed by Marge Bot
parent c3f6a59d57
commit 1b130c42b8
5 changed files with 241 additions and 8 deletions

View File

@ -111,6 +111,10 @@ struct ntv_context {
size_t num_regs;
struct hash_table *vars; /* nir_variable -> SpvId */
struct hash_table *so_outputs; /* pipe_stream_output -> SpvId */
unsigned outputs[VARYING_SLOT_MAX];
const struct glsl_type *so_output_gl_types[VARYING_SLOT_MAX];
SpvId so_output_types[VARYING_SLOT_MAX];
const SpvId *block_ids;
size_t num_blocks;
@ -353,7 +357,6 @@ emit_output(struct ntv_context *ctx, struct nir_variable *var)
if (ctx->stage == MESA_SHADER_VERTEX) {
unsigned slot = var->data.location;
switch (slot) {
HANDLE_EMIT_BUILTIN(POS, Position);
@ -368,6 +371,10 @@ emit_output(struct ntv_context *ctx, struct nir_variable *var)
case VARYING_SLOT_CLIP_DIST0:
assert(glsl_type_is_array(var->type));
spirv_builder_emit_builtin(&ctx->builder, var_id, SpvBuiltInClipDistance);
/* this can be as large as 2x vec4, which requires 2 slots */
ctx->outputs[VARYING_SLOT_CLIP_DIST1] = var_id;
ctx->so_output_gl_types[VARYING_SLOT_CLIP_DIST1] = var->type;
ctx->so_output_types[VARYING_SLOT_CLIP_DIST1] = var_type;
break;
default:
@ -383,6 +390,9 @@ emit_output(struct ntv_context *ctx, struct nir_variable *var)
* use driver_location for non-builtins with defined slots to avoid overlap
*/
}
ctx->outputs[var->data.location] = var_id;
ctx->so_output_gl_types[var->data.location] = var->type;
ctx->so_output_types[var->data.location] = var_type;
} else if (ctx->stage == MESA_SHADER_FRAGMENT) {
if (var->data.location >= FRAG_RESULT_DATA0)
spirv_builder_emit_location(&ctx->builder, var_id,
@ -823,6 +833,167 @@ emit_unop(struct ntv_context *ctx, SpvOp op, SpvId type, SpvId src)
return spirv_builder_emit_unop(&ctx->builder, op, type, src);
}
/* return the intended xfb output vec type based on base type and vector size */
static SpvId
get_output_type(struct ntv_context *ctx, unsigned register_index, unsigned num_components)
{
const struct glsl_type *out_type = ctx->so_output_gl_types[register_index];
enum glsl_base_type base_type = glsl_get_base_type(out_type);
if (base_type == GLSL_TYPE_ARRAY)
base_type = glsl_get_base_type(glsl_without_array(out_type));
switch (base_type) {
case GLSL_TYPE_BOOL:
return get_bvec_type(ctx, num_components);
case GLSL_TYPE_FLOAT:
return get_fvec_type(ctx, 32, num_components);
case GLSL_TYPE_INT:
return get_ivec_type(ctx, 32, num_components);
case GLSL_TYPE_UINT:
return get_uvec_type(ctx, 32, num_components);
default:
break;
}
unreachable("unknown type");
return 0;
}
/* for streamout create new outputs, as streamout can be done on individual components,
from complete outputs, so we just can't use the created packed outputs */
static void
emit_so_info(struct ntv_context *ctx, unsigned max_output_location,
const struct pipe_stream_output_info *so_info, struct pipe_stream_output_info *local_so_info)
{
for (unsigned i = 0; i < local_so_info->num_outputs; i++) {
struct pipe_stream_output so_output = local_so_info->output[i];
SpvId out_type = get_output_type(ctx, so_output.register_index, so_output.num_components);
SpvId pointer_type = spirv_builder_type_pointer(&ctx->builder,
SpvStorageClassOutput,
out_type);
SpvId var_id = spirv_builder_emit_var(&ctx->builder, pointer_type,
SpvStorageClassOutput);
char name[10];
snprintf(name, 10, "xfb%d", i);
spirv_builder_emit_name(&ctx->builder, var_id, name);
spirv_builder_emit_offset(&ctx->builder, var_id, (so_output.dst_offset * 4));
spirv_builder_emit_xfb_buffer(&ctx->builder, var_id, so_output.output_buffer);
spirv_builder_emit_xfb_stride(&ctx->builder, var_id, so_info->stride[so_output.output_buffer] * 4);
/* output location is incremented by VARYING_SLOT_VAR0 for non-builtins in vtn,
* so we need to ensure that the new xfb location slot doesn't conflict with any previously-emitted
* outputs.
*
* if there's no previous outputs that take up user slots (VAR0+) then we can start right after the
* glsl builtin reserved slots, otherwise we start just after the adjusted user output slot
*/
uint32_t location = NTV_MIN_RESERVED_SLOTS + i;
if (max_output_location >= VARYING_SLOT_VAR0)
location = max_output_location - VARYING_SLOT_VAR0 + 1 + i;
assert(location < VARYING_SLOT_VAR0);
spirv_builder_emit_location(&ctx->builder, var_id, location);
/* note: gl_ClipDistance[4] can the 0-indexed member of VARYING_SLOT_CLIP_DIST1 here,
* so this is still the 0 component
*/
if (so_output.start_component)
spirv_builder_emit_component(&ctx->builder, var_id, so_output.start_component);
uint32_t *key = ralloc_size(NULL, sizeof(uint32_t));
*key = (uint32_t)so_output.register_index << 2 | so_output.start_component;
_mesa_hash_table_insert(ctx->so_outputs, key, (void *)(intptr_t)var_id);
assert(ctx->num_entry_ifaces < ARRAY_SIZE(ctx->entry_ifaces));
ctx->entry_ifaces[ctx->num_entry_ifaces++] = var_id;
}
}
static void
emit_so_outputs(struct ntv_context *ctx,
const struct pipe_stream_output_info *so_info, struct pipe_stream_output_info *local_so_info)
{
SpvId loaded_outputs[VARYING_SLOT_MAX] = {};
for (unsigned i = 0; i < local_so_info->num_outputs; i++) {
uint32_t components[NIR_MAX_VEC_COMPONENTS];
struct pipe_stream_output so_output = local_so_info->output[i];
uint32_t so_key = (uint32_t) so_output.register_index << 2 | so_output.start_component;
struct hash_entry *he = _mesa_hash_table_search(ctx->so_outputs, &so_key);
assert(he);
SpvId so_output_var_id = (SpvId)(intptr_t)he->data;
SpvId type = get_output_type(ctx, so_output.register_index, so_output.num_components);
SpvId output = ctx->outputs[so_output.register_index];
SpvId output_type = ctx->so_output_types[so_output.register_index];
const struct glsl_type *out_type = ctx->so_output_gl_types[so_output.register_index];
if (!loaded_outputs[so_output.register_index])
loaded_outputs[so_output.register_index] = spirv_builder_emit_load(&ctx->builder, output_type, output);
SpvId src = loaded_outputs[so_output.register_index];
SpvId result;
for (unsigned c = 0; c < so_output.num_components; c++) {
components[c] = so_output.start_component + c;
/* this is the second half of a 2 * vec4 array */
if (ctx->stage == MESA_SHADER_VERTEX && so_output.register_index == VARYING_SLOT_CLIP_DIST1)
components[c] += 4;
}
/* if we're emitting a scalar or the type we're emitting matches the output's original type and we're
* emitting the same number of components, then we can skip any sort of conversion here
*/
if (glsl_type_is_scalar(out_type) || (type == output_type && glsl_get_length(out_type) == so_output.num_components))
result = src;
else {
if (ctx->stage == MESA_SHADER_VERTEX && so_output.register_index == VARYING_SLOT_POS) {
/* gl_Position was modified by nir_lower_clip_halfz, so we need to reverse that for streamout here:
*
* opengl gl_Position.z = (vulkan gl_Position.z * 2.0) - vulkan gl_Position.w
*
* to do this, we extract the z and w components, perform the multiply and subtract ops, then reinsert
*/
uint32_t z_component[] = {2};
uint32_t w_component[] = {3};
SpvId ftype = spirv_builder_type_float(&ctx->builder, 32);
SpvId z = spirv_builder_emit_composite_extract(&ctx->builder, ftype, src, z_component, 1);
SpvId w = spirv_builder_emit_composite_extract(&ctx->builder, ftype, src, w_component, 1);
SpvId new_z = emit_binop(ctx, SpvOpFMul, ftype, z, spirv_builder_const_float(&ctx->builder, 32, 2.0));
new_z = emit_binop(ctx, SpvOpFSub, ftype, new_z, w);
src = spirv_builder_emit_vector_insert(&ctx->builder, type, src, new_z, 2);
}
/* OpCompositeExtract can only extract scalars for our use here */
if (so_output.num_components == 1) {
result = spirv_builder_emit_composite_extract(&ctx->builder, type, src, components, so_output.num_components);
} else if (glsl_type_is_vector(out_type)) {
/* OpVectorShuffle can select vector members into a differently-sized vector */
result = spirv_builder_emit_vector_shuffle(&ctx->builder, type,
src, src,
components, so_output.num_components);
result = emit_unop(ctx, SpvOpBitcast, type, result);
} else {
/* for arrays, we need to manually extract each desired member
* and re-pack them into the desired output type
*/
for (unsigned c = 0; c < so_output.num_components; c++) {
uint32_t member[] = { so_output.start_component + c };
SpvId base_type = get_glsl_type(ctx, glsl_without_array(out_type));
if (ctx->stage == MESA_SHADER_VERTEX && so_output.register_index == VARYING_SLOT_CLIP_DIST1)
member[0] += 4;
components[c] = spirv_builder_emit_composite_extract(&ctx->builder, base_type, src, member, 1);
}
result = spirv_builder_emit_composite_construct(&ctx->builder, type, components, so_output.num_components);
}
}
spirv_builder_emit_store(&ctx->builder, so_output_var_id, result);
}
}
static SpvId
emit_binop(struct ntv_context *ctx, SpvOp op, SpvId type,
SpvId src0, SpvId src1)
@ -1988,7 +2159,7 @@ emit_cf_list(struct ntv_context *ctx, struct exec_list *list)
}
struct spirv_shader *
nir_to_spirv(struct nir_shader *s)
nir_to_spirv(struct nir_shader *s, const struct pipe_stream_output_info *so_info, struct pipe_stream_output_info *local_so_info)
{
struct spirv_shader *ret = NULL;
@ -2061,12 +2232,17 @@ nir_to_spirv(struct nir_shader *s)
ctx.vars = _mesa_hash_table_create(NULL, _mesa_hash_pointer,
_mesa_key_pointer_equal);
ctx.so_outputs = _mesa_hash_table_create(NULL, _mesa_hash_u32,
_mesa_key_u32_equal);
nir_foreach_variable(var, &s->inputs)
emit_input(&ctx, var);
nir_foreach_variable(var, &s->outputs)
emit_output(&ctx, var);
if (so_info)
emit_so_info(&ctx, util_last_bit64(s->info.outputs_written), so_info, local_so_info);
nir_foreach_variable(var, &s->uniforms)
emit_uniform(&ctx, var);
@ -2078,6 +2254,11 @@ nir_to_spirv(struct nir_shader *s)
SpvExecutionModeDepthReplacing);
}
if (so_info && so_info->num_outputs) {
spirv_builder_emit_cap(&ctx.builder, SpvCapabilityTransformFeedback);
spirv_builder_emit_exec_mode(&ctx.builder, entry_point,
SpvExecutionModeXfb);
}
spirv_builder_function(&ctx.builder, entry_point, type_void,
SpvFunctionControlMaskNone,
@ -2124,6 +2305,9 @@ nir_to_spirv(struct nir_shader *s)
free(ctx.defs);
if (so_info)
emit_so_outputs(&ctx, so_info, local_so_info);
spirv_builder_return(&ctx.builder); // doesn't belong here, but whatevz
spirv_builder_function_end(&ctx.builder);
@ -2154,6 +2338,9 @@ fail:
if (ctx.vars)
_mesa_hash_table_destroy(ctx.vars, NULL);
if (ctx.so_outputs)
_mesa_hash_table_destroy(ctx.so_outputs, NULL);
return NULL;
}

View File

@ -36,9 +36,10 @@ struct spirv_shader {
};
struct nir_shader;
struct pipe_stream_output_info;
struct spirv_shader *
nir_to_spirv(struct nir_shader *s);
nir_to_spirv(struct nir_shader *s, const struct pipe_stream_output_info *so_info, struct pipe_stream_output_info *local_so_info);
void
spirv_shader_delete(struct spirv_shader *s);

View File

@ -132,15 +132,55 @@ optimize_nir(struct nir_shader *s)
} while (progress);
}
/* check for a genuine gl_PointSize output vs one from nir_lower_point_size_mov */
static bool
check_psiz(struct nir_shader *s)
{
nir_foreach_variable(var, &s->outputs) {
if (var->data.location == VARYING_SLOT_PSIZ) {
/* genuine PSIZ outputs will have this set */
return !!var->data.explicit_location;
}
}
return false;
}
/* semi-copied from iris */
static void
update_so_info(struct pipe_stream_output_info *so_info,
uint64_t outputs_written, bool have_psiz)
{
uint8_t reverse_map[64] = {};
unsigned slot = 0;
while (outputs_written) {
int bit = u_bit_scan64(&outputs_written);
/* PSIZ from nir_lower_point_size_mov breaks stream output, so always skip it */
if (bit == VARYING_SLOT_PSIZ && !have_psiz)
continue;
reverse_map[slot++] = bit;
}
for (unsigned i = 0; i < so_info->num_outputs; i++) {
struct pipe_stream_output *output = &so_info->output[i];
/* Map Gallium's condensed "slots" back to real VARYING_SLOT_* enums */
output->register_index = reverse_map[output->register_index];
}
}
struct zink_shader *
zink_compile_nir(struct zink_screen *screen, struct nir_shader *nir)
zink_compile_nir(struct zink_screen *screen, struct nir_shader *nir,
const struct pipe_stream_output_info *so_info)
{
struct zink_shader *ret = CALLOC_STRUCT(zink_shader);
bool have_psiz = false;
ret->programs = _mesa_pointer_set_create(NULL);
NIR_PASS_V(nir, nir_lower_uniforms_to_ubo, 1);
NIR_PASS_V(nir, nir_lower_clip_halfz);
if (nir->info.stage == MESA_SHADER_VERTEX)
have_psiz = check_psiz(nir);
NIR_PASS_V(nir, nir_lower_regs_to_ssa);
optimize_nir(nir);
NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_function_temp, NULL);
@ -189,8 +229,12 @@ zink_compile_nir(struct zink_screen *screen, struct nir_shader *nir)
}
ret->info = nir->info;
if (so_info) {
memcpy(&ret->stream_output, so_info, sizeof(ret->stream_output));
update_so_info(&ret->stream_output, nir->info.outputs_written, have_psiz);
}
struct spirv_shader *spirv = nir_to_spirv(nir);
struct spirv_shader *spirv = nir_to_spirv(nir, so_info, so_info ? &ret->stream_output : NULL);
assert(spirv);
if (zink_debug & ZINK_DEBUG_SPIRV) {

View File

@ -65,7 +65,8 @@ struct zink_shader {
};
struct zink_shader *
zink_compile_nir(struct zink_screen *screen, struct nir_shader *nir);
zink_compile_nir(struct zink_screen *screen, struct nir_shader *nir,
const struct pipe_stream_output_info *so_info);
void
zink_shader_free(struct zink_screen *screen, struct zink_shader *shader);

View File

@ -296,7 +296,7 @@ zink_create_vs_state(struct pipe_context *pctx,
else
nir = (struct nir_shader *)shader->ir.nir;
return zink_compile_nir(zink_screen(pctx->screen), nir);
return zink_compile_nir(zink_screen(pctx->screen), nir, &shader->stream_output);
}
static void
@ -332,7 +332,7 @@ zink_create_fs_state(struct pipe_context *pctx,
else
nir = (struct nir_shader *)shader->ir.nir;
return zink_compile_nir(zink_screen(pctx->screen), nir);
return zink_compile_nir(zink_screen(pctx->screen), nir, NULL);
}
static void