ir3: Handle clip+cull distances

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6959>
This commit is contained in:
Connor Abbott 2020-09-24 16:01:54 +02:00 committed by Marge Bot
parent 9e063b01b7
commit 47f825ac63
2 changed files with 50 additions and 4 deletions

View File

@ -3045,12 +3045,32 @@ pack_inlocs(struct ir3_context *ctx)
unsigned actual_in = 0;
unsigned inloc = 0;
/* for clip+cull distances, unused components can't be eliminated because
* they're read by fixed-function, even if there's a hole. Note that
* clip/cull distance arrays must be declared in the FS, so we can just
* use the NIR clip/cull distances to avoid reading ucp_enables in the
* shader key.
*/
unsigned clip_cull_size =
ctx->so->shader->nir->info.clip_distance_array_size +
ctx->so->shader->nir->info.cull_distance_array_size;
unsigned clip_cull_mask = MASK(clip_cull_size);
for (unsigned i = 0; i < so->inputs_count; i++) {
unsigned compmask = 0, maxcomp = 0;
so->inputs[i].inloc = inloc;
so->inputs[i].bary = false;
if (so->inputs[i].slot == VARYING_SLOT_CLIP_DIST0 ||
so->inputs[i].slot == VARYING_SLOT_CLIP_DIST1) {
if (so->inputs[i].slot == VARYING_SLOT_CLIP_DIST0)
compmask = clip_cull_mask & 0xf;
else
compmask = clip_cull_mask >> 4;
used_components[i] = compmask;
}
for (unsigned j = 0; j < 4; j++) {
if (!(used_components[i] & (1 << j)))
continue;
@ -3319,6 +3339,14 @@ emit_instructions(struct ir3_context *ctx)
*/
ctx->so->num_samp = util_last_bit(ctx->s->info.textures_used) + ctx->s->info.num_images;
/* Save off clip+cull information. Note that in OpenGL clip planes may
* be individually enabled/disabled, so we can't use the
* clip_distance_array_size for them.
*/
ctx->so->clip_mask = ctx->so->key.ucp_enables;
ctx->so->cull_mask = MASK(ctx->s->info.cull_distance_array_size) <<
ctx->s->info.clip_distance_array_size;
/* NOTE: need to do something more clever when we support >1 fxn */
nir_foreach_register (reg, &fxn->registers) {
ir3_declare_array(ctx, reg);
@ -3362,6 +3390,13 @@ fixup_astc_srgb(struct ir3_context *ctx)
}
}
static bool
output_slot_used_for_binning(gl_varying_slot slot)
{
return slot == VARYING_SLOT_POS || slot == VARYING_SLOT_PSIZ ||
slot == VARYING_SLOT_CLIP_DIST0 || slot == VARYING_SLOT_CLIP_DIST1;
}
static void
fixup_binning_pass(struct ir3_context *ctx)
{
@ -3376,8 +3411,7 @@ fixup_binning_pass(struct ir3_context *ctx)
unsigned outidx = out->collect.outidx;
unsigned slot = so->outputs[outidx].slot;
/* throw away everything but first position/psize */
if ((slot == VARYING_SLOT_POS) || (slot == VARYING_SLOT_PSIZ)) {
if (output_slot_used_for_binning(slot)) {
ir->outputs[j] = ir->outputs[i];
j++;
}
@ -3390,8 +3424,7 @@ fixup_binning_pass(struct ir3_context *ctx)
for (i = 0, j = 0; i < so->outputs_count; i++) {
unsigned slot = so->outputs[i].slot;
/* throw away everything but first position/psize */
if ((slot == VARYING_SLOT_POS) || (slot == VARYING_SLOT_PSIZ)) {
if (output_slot_used_for_binning(slot)) {
so->outputs[j] = so->outputs[i];
/* fixup outidx to point to new output table entry: */

View File

@ -653,6 +653,8 @@ struct ir3_shader_variant {
/* Are we using split or merged register file? */
bool mergedregs;
uint8_t clip_mask, cull_mask;
/* for astc srgb workaround, the number/base of additional
* alpha tex states we need, and index of original tex states
*/
@ -863,6 +865,9 @@ struct ir3_shader_linkage {
/* location for fixed-function gl_ViewIndex passthrough */
uint8_t viewid_loc;
/* location for combined clip/cull distance arrays */
uint8_t clip0_loc, clip1_loc;
};
static inline void
@ -904,6 +909,8 @@ ir3_link_shaders(struct ir3_shader_linkage *l,
l->primid_loc = 0xff;
l->viewid_loc = 0xff;
l->clip0_loc = 0xff;
l->clip1_loc = 0xff;
while (l->cnt < ARRAY_SIZE(l->var)) {
j = ir3_next_varying(fs, j);
@ -925,6 +932,12 @@ ir3_link_shaders(struct ir3_shader_linkage *l,
l->viewid_loc = fs->inputs[j].inloc;
}
if (fs->inputs[j].slot == VARYING_SLOT_CLIP_DIST0)
l->clip0_loc = fs->inputs[j].inloc;
if (fs->inputs[j].slot == VARYING_SLOT_CLIP_DIST1)
l->clip1_loc = fs->inputs[j].inloc;
ir3_link_add(l, k >= 0 ? vs->outputs[k].regid : default_regid,
fs->inputs[j].compmask, fs->inputs[j].inloc);
}